## *******************************
##    B14 Implied Effect on Growth     
##       Hao Zhang & Ye Zhang
##             2025/7/24
## *******************************

library(readstata13)
library(lfe)
library(stargazer)
library(tidyverse)
library(doBy)
library(data.table)
library(DescTools)

# set working directory
setwd("../raw data")

# read in data in the CIES folder
options(scipen = 999)
firm <- read.dta13("cies 1998-2007.dta") %>% 
  mutate(citycode = as.numeric(substr(as.character(city), 0, 4)))

# create ownership types
firm$ownership <- as.numeric(firm$ownership)
firm <- firm %>% mutate(soe = ifelse(ownership == 110 | ownership == 151, 1, 0),
                        hmt = ifelse(ownership > 199 & ownership < 299, 1, 0),
                        fie = ifelse(ownership > 299, 1, 0), 
                        private = ifelse(soe == 0 & hmt == 0 & fie == 0, 1, 0))

# create insurance dummy
firm <- firm %>% filter(insurance >= 0) %>% 
  mutate(dummy1 = ifelse(insurance == 0, 0, 1)) %>%
  mutate(dummy2 = ifelse(medical == 0, 0, 1))

# create total_insurance and insurance rate
firm_base1 <- firm %>% filter(year >= 2003, year <= 2006) %>% 
  select(citycode, year, frdm, wage) %>% filter(wage > 0) %>%
  mutate(year = year + 1)
firm_base2 <- firm %>% 
  select(citycode, year, frdm, wage) %>% filter(wage > 0) %>%
  mutate(year = year + 1)
firm2004 <- firm %>% filter(year >= 2004) %>%
  filter(insurance >= 0, medical >= 0) %>% 
  mutate(total_insurance = insurance + medical) %>%
  mutate(dummy2 = ifelse(medical == 0, 0, 1))
firm <- left_join(firm, firm_base2, by = c("citycode", "year", "frdm"), relationship = "many-to-many")
firm2004 <- left_join(firm2004, firm_base1, by = c("citycode", "year", "frdm"), relationship = "many-to-many")
firm <- firm %>% mutate(rate = insurance/wage.y)
firm2004 <- firm2004 %>% mutate(rate = medical/wage.y)
firm$rate <- Winsorize(firm$rate, quantile(firm$rate, probs = c(0.05, 0.95), na.rm = TRUE))
firm2004$rate <- Winsorize(firm2004$rate, quantile(firm2004$rate, probs = c(0.05, 0.95), na.rm = TRUE))

# clean city panel and city leadership
city <- read.dta13("city panel 1995-2011.dta")
city <- city %>% mutate(province = as.integer(citycode/100)) %>% 
  group_by(province, year) %>%
  mutate(gdpgrowth_mean = mean(gdpgrowth, na.rm = TRUE)) %>% 
  mutate(year = year + 1) %>% mutate(gdp_pressure = gdpgrowth_mean - gdpgrowth)
mayor <- read.dta13("city mayor 1990-2015.dta") %>% 
  select(citycode, year, code, begin, end, educ, ethnicity, gender, birth_year, seq)

# add tax revenue and other city level variables
city1 <- readxl::read_xlsx("city panel 1996-2014.xlsx") # warnings due to conversion from text to number for the year variable for districts and provinces
citycode <- readxl::read_xlsx("citycode.xlsx") %>%
  mutate(citycode = as.integer(citycode/100))
city1 <- left_join(city1, citycode, by = "city", relationship = "many-to-many") %>% distinct() %>%
  filter(year >= 1999 & year <= 2007)
city <- left_join(city, city1, by = c("citycode", "year"))

# run inverse hyperbolic sine
ihs <- function(x) {
  y <- log(x + sqrt(x ^ 2 + 1))
  return(y)
}

# create new firm indicator
new <- subset(firm, establishment == year)
new$count <- 1

# lag firm variable
firm$year <- firm$year + 1

# test with city panels - unemployment
noinsurance <- summaryBy(dummy1 ~ citycode + year, data = firm, FUN = mean, na.rm=TRUE)
profit <- summaryBy(profit ~ citycode + year, data = firm, FUN = sum, na.rm=TRUE)
number_new <- summaryBy(count ~ citycode + year, data = new, FUN = sum, na.rm=TRUE)
noinsurance <- left_join(noinsurance, profit, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, city, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, mayor, by = c("citycode", "year"))
noinsurance <- left_join(noinsurance, number_new, by = c("citycode", "year"))
noinsurance <- noinsurance %>% filter(citycode != 1100, citycode != 1200, 
                                      citycode != 3100, citycode != 5102)
noinsurance <- noinsurance %>% filter(gdpgrowth <= 0.6 & gdpgrowth >= -0.2) %>% 
  mutate(capacity = (tax_revenue*100000 - profits)/gdp/10, deficit = expenditure - revenue_allcity, unemp_rate = unemployment/population)

# control for institutions and run regressions
noinsurance <- noinsurance %>% mutate(province = as.integer(citycode/100)) %>% 
  filter(year >= 2001) %>%
  mutate(tax = ifelse(province == 33 & year >= 2004 | province == 13 & year >= 2002 | 
                        province == 42 & year >= 2002 | province == 44 & year >= 2002 | 
                        province == 32 & year >= 2005 | province == 53 & year >= 2006, 1, 0))

noinsurance$count.sum[is.na(noinsurance$count.sum) == TRUE] <- 0

m1 <- felm(gdpgrowth ~ dummy1.mean | 
             citycode + year + educ + ethnicity  + gender + begin + connection +
             seq + tax | 0 | citycode, data = noinsurance)

m2 <- felm(gdpgrowth ~ dummy1.mean + log(gdp) + log(gdp/population) + ihs(count.sum) +
             log(FDI + 1) + ihs(deficit/gdp) + ihs(capacity) + tax | 
             citycode + year + educ + ethnicity  + gender + begin + connection + 
             seq | 0 | citycode, data = noinsurance)

stargazer(m1, m2, title = "Social Insurance and GDP Growth (Coverage)", 
          dep.var.labels = c("GDP Growth"),
          omit.stat = c("rsq", "adj.rsq", "ser"), 
          font.size = "small", 
          add.lines = list(c("Mayor Characteristics", "Y", "Y", "Y", "Y"),
                           c("City Fixed Effects", "Y", "Y", "Y", "Y"), 
                           c("Year Fixed Effects", "Y", "Y", "Y", "Y")))
